字符串匹配算法BF和KMP总结
背景
来看一道leetcode题目:
Implement strStr().
Returns the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack.
让我们找到第一个子串的位置,这就是典型的字符串匹配问题。首先想到的就是暴力求解,时间复杂度O(mn).
BF算法实现
class Solution {
public:
int strStr(string haystack, string needle) {
if (haystack.size() < needle.size())
return -1;
for(int i = 0; i <= haystack.size() - needle.size(); i++) {
int flag = 1;
for (int j = 0; j < needle.size(); j++) {
if (haystack[i + j] != needle[j]) {
flag = 0;
break;
}
}
if (flag == 1) {
return i;
}
}
return -1;
}
};
但是我们可以发现,其实有些字符串我们已经匹配过了,就不需要再次匹配了,这就是经典的KMP算法,时间复杂度为O(m + n)。KMP算法的讲的比较好的,可以参考这个博客:http://www.cnblogs.com/c-cloud/p/3224788.html
KMP算法实现
class Solution {
public:
int strStr(string haystack, string needle) {
if (haystack.size() == 0) {
if (needle.size() == 0)
return 0;
else
return -1;
} else if (needle.size() == 0)
return 0;
if (haystack.size() < needle.size())
return -1;
vector<int> Next(needle.size());
getNext(needle, Next);
for (int i = 0; i < haystack.size(); ) {
// 如果剩下的字符串比目标字符串更短,那么直接返回-1.
if (haystack.size() - i < needle.size())
return -1;
int num = 0;
int step = 1;
for (int j = 0; j < needle.size(); j++) {
if (haystack[i + j] == needle[j]) {
num++;
} else
break;
}
if (num == needle.size())
return i;
if (num)
step = num - Next[num - 1];
i = i + step;
}
return -1;
}
void getNext(string needle, vector<int>& next) {
int k = 0;
next[0] = 0;
for (int i = 1; i < needle.size(); i++) {
while (k > 0 && needle[i] != needle[k])
k = next[k - 1];
if (needle[i] == needle[k])
k++;
next[i] = k;
}
}
};